Pascal Super Library

home *** CD-ROM | disk | FTP | other *** search

/ Pascal Super Library / Pascal Super Library (CW International)(1997).bin / COMPILER / LYSRC / YACC.Y < prev next >

Wrap

Text File | 1993-01-23 | 22KB | 812 lines

/* YACC.Y: Yacc grammar for Yacc main program. 2-17-91, 4-30-91 AG To bootstrap Yacc, use Yacc iself to compile this grammar, then run tpc on the generated program. Note: This is not entirely the `official' syntax introduced by Johnson, but it should be compatible with UNIX Yacc (except for the differences specified in the program header, below), as described in the UNIX manual, including the language elements entitled as "old features supported but not encouraged." Bugs: - Processes $$'s, $i's, %} and } inside of comments in Turbo Pascal code (instead of ignoring them). Shift/reduce conflicts: This grammar will produce a number of shift/reduce conflicts caused by the error productions, since it does not specify unambigiously whether errors are to be handled in global structures (definitions and rules) or by enclosed syntactic constructs (e.g. symbols). Yacc will resolve these conflicts in favour of shift, which is o.k. (it means that errors will be caught in the innermost constructs with error handling, thus reducing the amount of skipped symbols in resynchronization). Error handling is done using the general method of Schreiner/Friedman (see Schreiner/Friedman, "Introduction to compiler construction with UNIX," 1985). */ %{ {$X+} program Yacc; (* Copyright (c) 1990,91 by Albert Graef, Schillerstr. 18, 6509 Schornsheim/Germany All rights reserved *) uses YaccLib, YaccBase, YaccMsgs, YaccSem, YaccTables, YaccParseTable; (* TP Yacc - Yet Another Compiler Compiler for MS-DOS/Turbo Pascal Version 3.0 as of April 91 Version 3.0a as of May 92 (bug fixes in precedence and type information updates) Author Albert Graef Schillerstr. 18 6509 Schornsheim/Germany Graef@DMZRZU71.bitnet Synopsis YACC [options] yacc-file[.Y] [output-file[.PAS]] Options /v "Verbose:" Yacc generates a readable description of the generated parser, written to yacc-file with new extension .LST. /d "Debug:" Yacc generates parser with debugging output. Description This is a reimplementation of the popular UNIX compiler generator Yacc for MS-DOS and Turbo Pascal. Differences from UNIX Yacc: - Produces output code for Turbo Pascal, rather than for C. - Does not support %union definitions. Instead, a value type is declared by specifying the type identifier *itself* as the tag of a %token or %type definition. Yacc will automatically generate an appropriate yylval variable of a variant record type (YYSType) which is capable of holding values of any of the types used in %token and %type. Type checking is *very* strict. If you use type definitions, then any symbol referred to in an action *must* have a type introduced in a type definition. Either the symbol must have been assigned a type in the definitions section, or the $<type-identifier> notation must be used. The syntax of the %type definition has been changed slightly to allow definitions of the form %type <type-identifier> (omitting the nonterminals) which may be used to declare types which are not assigned to any grammar symbol, but are used with the $<...> construct. - The parse tables constructed by this Yacc version are slightly greater than those constructed by UNIX Yacc, since a reduce action will only be chosen as the default action if it is the *only* action in the state. In difference, UNIX Yacc chooses a reduce action as the default action whenever it is the only *reduce* action of the state (even if there are other shift actions). This solves a bug in UNIX Yacc that makes the generated parser start error recovery too late with certain types of error productions (see also Schreiner/Friedman, "Introduction to compiler construction with UNIX," 1985). Also, errors will be caught sooner in most cases where standard Yacc would carry out an additional (default) reduction before detecting the error. *) %} /* Lexical part of the Yacc language: */ %token ID /* identifiers: {letter}{letter_or_digit}* */ C_ID /* identifier which forms left side of rule, i.e. is followed by a colon */ LITERAL /* single character literal */ LITID /* multiple character literal */ NUMBER /* nonnegative integers: {digit}+ */ PTOKEN PLEFT PRIGHT PNONASSOC PTYPE PSTART PPREC /* reserved words: PTOKEN=%token, etc. */ PP /* source sections separator %% */ LCURL /* curly braces: %{ and %} */ RCURL ',' ':' ';' '|' '{' '}' '<' '>' '=' /* literals */ ILLEGAL /* illegal input character */ %start grammar %% /* Lexical entities, those that may give rise to syntax errors are augmented with error productions, and important symbols call yyerrok. */ id : ID c_id : C_ID literal : LITERAL litid : LITID number : NUMBER ptoken : PTOKEN { yyerrok; } pleft : PLEFT { yyerrok; } pright : PRIGHT { yyerrok; } pnonassoc : PNONASSOC { yyerrok; } ptype : PTYPE { yyerrok; } pstart : PSTART { yyerrok; } pprec : PPREC pp : PP { yyerrok; } lcurl : LCURL rcurl : RCURL | error { error(rcurl_expected); } comma : ',' colon : ':' { yyerrok; } semicolon : ';' { yyerrok; } bar : '|' { yyerrok; } lbrace : '{' rbrace : '}' | error { error(rbrace_expected); } langle : '<' rangle : '>' | error { error(rangle_expected); } eq : '=' /* Syntax and semantic routines: */ grammar : defs pp { sort_types; definitions; next_section; } rules { next_section; generate_parser; next_section; } aux_procs ; aux_procs : /* empty: aux_procs is optional */ | pp { copy_rest_of_file; } ; defs : /* empty */ | defs def { yyerrok; } | defs error { error(error_in_def); } ; def : pstart id { startnt := ntsym($2); } | pstart error { error(ident_expected); } | lcurl { copy_code; } rcurl | ptoken { act_prec := 0; } tag token_list | pleft { act_prec := new_prec_level(left); } tag token_list | pright { act_prec := new_prec_level(right); } tag token_list | pnonassoc { act_prec := new_prec_level(nonassoc); } tag token_list | ptype tag nonterm_list | ptype tag ; tag : /* empty: type tag is optional */ { act_type := 0; } | langle id rangle { act_type := $2; add_type($2); } ; token_list : token_num | token_list token_num { yyerrok; } | token_list comma token_num { yyerrok; } | error { error(ident_expected); } | token_list error { error(error_in_def); } | token_list comma error { error(ident_expected); } ; token_num : literal { if act_type<>0 then sym_type^[$1] := act_type; if act_prec<>0 then sym_prec^[$1] := act_prec; } | litid { litsym($1, 0); if act_type<>0 then sym_type^[litsym($1, 0)] := act_type; if act_prec<>0 then sym_prec^[litsym($1, 0)] := act_prec; } | id { litsym($1, 0); if act_type<>0 then sym_type^[litsym($1, 0)] := act_type; if act_prec<>0 then sym_prec^[litsym($1, 0)] := act_prec; } | litid number { litsym($1, 0); if act_type<>0 then sym_type^[litsym($1, $2)] := act_type; if act_prec<>0 then sym_prec^[litsym($1, 0)] := act_prec; } | id number { litsym($1, 0); if act_type<>0 then sym_type^[litsym($1, $2)] := act_type; if act_prec<>0 then sym_prec^[litsym($1, 0)] := act_prec; } ; nonterm_list : nonterm | nonterm_list nonterm { yyerrok; } | nonterm_list comma nonterm { yyerrok; } | error { error(ident_expected); } | nonterm_list error { error(error_in_def); } | nonterm_list comma error { error(ident_expected); } ; nonterm : id { if act_type<>0 then sym_type^[ntsym($1)] := act_type; } ; rules : { next_section; } rule1 | lcurl { copy_code; } rcurl { next_section; } rule1 /* rules section may be prefixed with `local' Turbo Pascal declarations */ | rules rule { yyerrok; } | error { error(error_in_rule); } | rules error { error(error_in_rule); } ; rule1 : c_id { start_rule(ntsym($1)); } colon { start_body; } body prec { end_body; } ; rule : rule1 | bar { start_body; } body prec { end_body; } ; body : /* empty */ | body literal { add_symbol($2); yyerrok; } | body litid { add_symbol(sym($2)); yyerrok; } | body id { add_symbol(sym($2)); yyerrok; } | body action { add_action; yyerrok; } | body error { error(error_in_rule); } ; action : lbrace { copy_action; } rbrace | eq { copy_single_action; } /* old language feature; code must be single statement ending with `;' */ ; prec : /* empty */ | pprec literal { add_rule_prec($2); } opt_action | pprec litid { add_rule_prec(litsym($2, 0)); } opt_action | pprec id { add_rule_prec(litsym($2, 0)); } opt_action | prec semicolon ; opt_action : /* empty */ | action { add_action; } ; %% (* Lexical analyzer (implemented in Turbo Pascal for maximum efficiency): *) function yylex : integer; function end_of_input : boolean; begin end_of_input := (cno>length(line)) and eof(yyin) end(*end_of_input*); procedure scan; (* scan for nonempty character, skip comments *) procedure scan_comment; var p : integer; begin p := pos('*/', copy(line, cno, length(line))); if p>0 then cno := cno+succ(p) else begin while (p=0) and not eof(yyin) do begin readln(yyin, line); inc(lno); p := pos('*/', line) end; if p=0 then begin cno := succ(length(line)); error(open_comment_at_eof); end else cno := succ(succ(p)) end end(*scan_comment*); begin while not end_of_input do if cno<=length(line) then case line[cno] of ' ', tab : inc(cno); '/' : if (cno<length(line)) and (line[succ(cno)]='*') then begin inc(cno, 2); scan_comment end else exit else exit end else begin readln(yyin, line); inc(lno); cno := 1; end end(*scan*); function scan_ident : integer; (* scan an identifier *) var idstr : String; begin idstr := line[cno]; inc(cno); while (cno<=length(line)) and ( ('A'<=upCase(line[cno])) and (upCase(line[cno])<='Z') or ('0'<=line[cno]) and (line[cno]<='9') or (line[cno]='_') or (line[cno]='.') ) do begin idstr := idstr+line[cno]; inc(cno) end; yylval := get_key(idstr); scan; if not end_of_input and (line[cno]=':') then scan_ident := C_ID else scan_ident := ID end(*scan_ident*); function scan_literal: integer; (* scan a literal, i.e. string *) var idstr : String; oct_val : Byte; begin idstr := line[cno]; inc(cno); while (cno<=length(line)) and (line[cno]<>idstr[1]) do if line[cno]='\' then if cno<length(line) then begin inc(cno); case line[cno] of 'n' : begin idstr := idstr+nl; inc(cno) end; 'r' : begin idstr := idstr+cr; inc(cno) end; 't' : begin idstr := idstr+tab; inc(cno) end; 'b' : begin idstr := idstr+bs; inc(cno) end; 'f' : begin idstr := idstr+ff; inc(cno) end; '0'..'7' : begin oct_val := ord(line[cno])-ord('0'); inc(cno); while (cno<=length(line)) and ('0'<=line[cno]) and (line[cno]<='7') do begin oct_val := oct_val*8+ord(line[cno])-ord('0'); inc(cno) end; idstr := idstr+chr(oct_val) end else begin idstr := idstr+line[cno]; inc(cno) end end end else inc(cno) else begin idstr := idstr+line[cno]; inc(cno) end; if cno>length(line) then error(missing_string_terminator) else inc(cno); if length(idstr)=2 then begin yylval := ord(idstr[2]); scan_literal := LITERAL; end else if length(idstr)>1 then begin yylval := get_key(''''+copy(idstr, 2, pred(length(idstr)))+''''); scan_literal := LITID; end else scan_literal := ILLEGAL; end(*scan_literal*); function scan_num : integer; (* scan an unsigned integer *) var numstr : String; code : integer; begin numstr := line[cno]; inc(cno); while (cno<=length(line)) and ('0'<=line[cno]) and (line[cno]<='9') do begin numstr := numstr+line[cno]; inc(cno) end; val(numstr, yylval, code); if code=0 then scan_num := NUMBER else scan_num := ILLEGAL; end(*scan_num*); function scan_keyword : integer; (* scan %xy *) function lookup(key : String; var tok : integer) : boolean; (* table of Yacc keywords (unstropped): *) const no_of_entries = 11; max_entry_length = 8; keys : array [1..no_of_entries] of String[max_entry_length] = ( '0', '2', 'binary', 'left', 'nonassoc', 'prec', 'right', 'start', 'term', 'token', 'type'); toks : array [1..no_of_entries] of integer = ( PTOKEN, PNONASSOC, PNONASSOC, PLEFT, PNONASSOC, PPREC, PRIGHT, PSTART, PTOKEN, PTOKEN, PTYPE); var m, n, k : integer; begin (* binary search: *) m := 1; n := no_of_entries; lookup := true; while m<=n do begin k := m+(n-m) div 2; if key=keys[k] then begin tok := toks[k]; exit end else if key>keys[k] then m := k+1 else n := k-1 end; lookup := false end(*lookup*); var keywstr : String; tok : integer; begin inc(cno); if cno<=length(line) then case line[cno] of '<' : begin scan_keyword := PLEFT; inc(cno) end; '>' : begin scan_keyword := PRIGHT; inc(cno) end; '=' : begin scan_keyword := PPREC; inc(cno) end; '%', '\' : begin scan_keyword := PP; inc(cno) end; '{' : begin scan_keyword := LCURL; inc(cno) end; '}' : begin scan_keyword := RCURL; inc(cno) end; 'A'..'Z', 'a'..'z', '0'..'9' : begin keywstr := line[cno]; inc(cno); while (cno<=length(line)) and ( ('A'<=upCase(line[cno])) and (upCase(line[cno])<='Z') or ('0'<=line[cno]) and (line[cno]<='Z') ) do begin keywstr := keywstr+line[cno]; inc(cno) end; if lookup(keywstr, tok) then scan_keyword := tok else scan_keyword := ILLEGAL end; else scan_keyword := ILLEGAL end else scan_keyword := ILLEGAL; end(*scan_keyword*); function scan_char : integer; (* scan any single character *) begin scan_char := ord(line[cno]); inc(cno) end(*scan_char*); var lno0, cno0 : integer; begin tokleng := 0; scan; lno0 := lno; cno0 := cno; if end_of_input then yylex := 0 else case line[cno] of 'A'..'Z', 'a'..'z', '_' : yylex := scan_ident; '''', '"' : yylex := scan_literal; '0'..'9' : yylex := scan_num; '%', '\' : yylex := scan_keyword; '=' : if (cno<length(line)) and (line[succ(cno)]='{') then begin inc(cno); yylex := scan_char end else yylex := scan_char; else yylex := scan_char; end; if lno=lno0 then tokleng := cno-cno0 end(*yylex*); (* Main program: *) var i : Integer; begin (* sign-on: *) writeln(sign_on); (* parse command line: *) if paramCount=0 then begin writeln(usage); writeln(options); halt(0); end; yfilename := ''; pasfilename := ''; for i := 1 to paramCount do if copy(paramStr(i), 1, 1)='/' then if upper(paramStr(i))='/V' then verbose := true else if upper(paramStr(i))='/D' then debug := true else begin writeln(invalid_option, paramStr(i)); halt(1); end else if yfilename='' then yfilename := addExt(upper(paramStr(i)), 'Y') else if pasfilename='' then pasfilename := addExt(upper(paramStr(i)), 'PAS') else begin writeln(illegal_no_args); halt(1); end; if yfilename='' then begin writeln(illegal_no_args); halt(1); end; if pasfilename='' then pasfilename := root(yfilename)+'.PAS'; lstfilename := root(yfilename)+'.LST'; (* open files: *) assign(yyin, yfilename); assign(yyout, pasfilename); assign(yylst, lstfilename); reset(yyin); if ioresult<>0 then fatal(cannot_open_file+yfilename); rewrite(yyout); if ioresult<>0 then fatal(cannot_open_file+pasfilename); rewrite(yylst); if ioresult<>0 then fatal(cannot_open_file+lstfilename); (* search code template in current directory, then on path where Yacc was executed from: *) codfilename := 'YYPARSE.COD'; assign(yycod, codfilename); reset(yycod); if ioresult<>0 then begin codfilename := upper(path(paramStr(0)))+'YYPARSE.COD'; assign(yycod, codfilename); reset(yycod); if ioresult<>0 then fatal(cannot_open_file+codfilename); end; (* parse source grammar: *) write('parse ... '); lno := 0; cno := 1; line := ''; next_section; if debug then writeln(yyout, '{$define yydebug}'); if yyparse=0 then { done } else if yychar=0 then error(unexpected_eof) else error(syntax_error); if errors=0 then writeln('DONE'); (* close files: *) close(yyin); close(yyout); close(yylst); close(yycod); (* print statistics: *) if errors>0 then writeln( lno, ' lines, ', errors, ' errors found.' ) else begin writeln( lno, ' lines, ', n_rules-1, '/', max_rules-1, ' rules, ', n_states, '/', max_states, ' s, ', n_items, '/', max_items, ' i, ', n_trans, '/', max_trans, ' t, ', n_redns, '/', max_redns, ' r.'); if shift_reduce>0 then writeln(shift_reduce, ' shift/reduce conflicts.'); if reduce_reduce>0 then writeln(reduce_reduce, ' reduce/reduce conflicts.'); if never_reduced>0 then writeln(never_reduced, ' rules never reduced.'); end; if warnings>0 then writeln(warnings, ' warnings.'); writeln( n_bytes, '/', max_bytes, ' bytes of memory used.'); (* terminate: *) if errors>0 then begin erase(yyout); if ioresult<>0 then ; end; if file_size(lstfilename)=0 then erase(yylst) else writeln('(see ', lstfilename, ' for more information)'); halt(errors); end(*Yacc*).